// FOptionContainer class - contains the options for a filter group,
// including the banned/grey/exception site lists and the content/site/url regexp lists

//Please refer to http://dansguardian.org/?page=copyright2
//for the license for this code.
//Written by Daniel Barron (daniel@//jadeb/.com).
//For support go to http://groups.yahoo.com/group/dansguardian

//  This program is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program; if not, write to the Free Software
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


// INCLUDES

#ifdef HAVE_CONFIG_H
#include "../mind_config.h"
#endif
#include "FOptionContainer.hpp"
#include "OptionContainer.hpp"
#include "Logger.hpp"

#include <cstdlib>
#include <iostream>
#include <fstream>
#include <netdb.h>		// for gethostby
#include <netinet/in.h>		// for address structures
#include <arpa/inet.h>		// for inet_aton()
#include <sys/socket.h>


// GLOBALS

extern bool is_daemonised;
extern OptionContainer o;
extern Logger log;

// IMPLEMENTATION

// reverse DNS lookup on IP. be aware that this can return multiple results, unlike a standard lookup.

std::deque<String> * ipToHostname(const char *ip) {
    std::deque<String> *result = new std::deque<String>;
    struct in_addr address, **addrptr;
    if (inet_aton(ip, &address)) { // convert to in_addr
        struct hostent *answer;
        answer = gethostbyaddr((char *) & address, sizeof (address), AF_INET);
        if (answer) { // sucess in reverse dns
            result->push_back(String(answer->h_name));
            for (addrptr = (struct in_addr **) answer->h_addr_list; *addrptr; addrptr++) {
                result->push_back(String(inet_ntoa(**addrptr)));
            }
        }
    }
    return result;
}

FOptionContainer::~FOptionContainer() {
    reset();
}

void FOptionContainer::reset() {
    if (banned_phrase_flag) o.lm.deRefList(banned_phrase_list);
    if (exception_site_flag) o.lm.deRefList(exception_site_list);
    if (exception_url_flag) o.lm.deRefList(exception_url_list);
    if (banned_extension_flag) o.lm.deRefList(banned_extension_list);
    if (banned_mimetype_flag) o.lm.deRefList(banned_mimetype_list);
    if (banned_site_flag) o.lm.deRefList(banned_site_list);
    if (banned_url_flag) o.lm.deRefList(banned_url_list);
    if (grey_site_flag) o.lm.deRefList(grey_site_list);
    if (grey_url_flag) o.lm.deRefList(grey_url_list);
    if (banned_regexpurl_flag) o.lm.deRefList(banned_regexpurl_list);
    if (exception_regexpurl_flag) o.lm.deRefList(exception_regexpurl_list);
    if (banned_regexpheader_flag) o.lm.deRefList(banned_regexpheader_list);
    if (content_regexp_flag) o.lm.deRefList(content_regexp_list);
    if (url_regexp_flag) o.lm.deRefList(url_regexp_list);
    if (header_regexp_flag) o.lm.deRefList(header_regexp_list);
    if (exception_extension_flag) o.lm.deRefList(exception_extension_list);
    if (exception_mimetype_flag) o.lm.deRefList(exception_mimetype_list);
    if (exception_file_site_flag) o.lm.deRefList(exception_file_site_list);
    if (exception_file_url_flag) o.lm.deRefList(exception_file_url_list);
    if (log_site_flag) o.lm.deRefList(log_site_list);
    if (log_url_flag) o.lm.deRefList(log_url_list);
    if (log_regexpurl_flag) o.lm.deRefList(log_regexpurl_list);

    banned_phrase_flag = false;
    exception_site_flag = false;
    exception_url_flag = false;
    banned_extension_flag = false;
    banned_mimetype_flag = false;
    banned_site_flag = false;
    banned_url_flag = false;
    grey_site_flag = false;
    grey_url_flag = false;
    banned_regexpurl_flag = false;
    exception_regexpurl_flag = false;
    banned_regexpheader_flag = false;
    content_regexp_flag = false;
    url_regexp_flag = false;
    header_regexp_flag = false;
    exception_extension_flag = false;
    exception_mimetype_flag = false;
    exception_file_site_flag = false;
    exception_file_url_flag = false;
    log_site_flag = false;
    log_url_flag = false;
    log_regexpurl_flag = false;
    block_downloads = false;
    banned_phrase_list_index.clear();
    conffile.clear();
    content_regexp_list_comp.clear();
    content_regexp_list_rep.clear();
    url_regexp_list_comp.clear();
    url_regexp_list_rep.clear();
    header_regexp_list_comp.clear();
    header_regexp_list_rep.clear();
    banned_regexpurl_list_comp.clear();
    banned_regexpurl_list_source.clear();
    banned_regexpurl_list_ref.clear();
    exception_regexpurl_list_comp.clear();
    exception_regexpurl_list_source.clear();
    exception_regexpurl_list_ref.clear();
    banned_regexpheader_list_comp.clear();
    banned_regexpheader_list_source.clear();
    banned_regexpheader_list_ref.clear();
    log_regexpurl_list_comp.clear();
    log_regexpurl_list_source.clear();
    log_regexpurl_list_ref.clear();
    delete banned_page;
    banned_page = NULL;
}

// grab this FG's HTML template

HTMLTemplate* FOptionContainer::getHTMLTemplate() {
    if (banned_page)
        return banned_page;
    return &(o.html_template);
}

// read in the given file, write the list's ID into the given identifier,
// sort using startsWith or endsWith depending on sortsw, and create a cache file if desired.
// listname is used in error messages.

bool FOptionContainer::readFile(const char *filename, unsigned int* whichlist, bool sortsw, bool cache, const char *listname) {
    int res = o.lm.newItemList(filename, sortsw, 1, true);
    if (res < 0) {
        log.writeToLog(1, "Error opening %s", listname);
        return false;
    }
    (*whichlist) = (unsigned) res;
    if (!(*o.lm.l[(*whichlist)]).used) {
        if (sortsw)
            (*o.lm.l[(*whichlist)]).doSort(true);
        else
            (*o.lm.l[(*whichlist)]).doSort(false);
        if (cache && createlistcachefiles) {
            if (!(*o.lm.l[(*whichlist)]).createCacheFile()) {
                return false;
            }
        }
        (*o.lm.l[(*whichlist)]).used = true;
    }
    return true;
}

bool FOptionContainer::read(const char *filename) {
    try { // all sorts of exceptions could occur reading conf files
        std::string linebuffer;
        String temp; // for tempory conversion and storage
        std::ifstream conffiles(filename, std::ios::in); // mindfN.conf
        if (!conffiles.good()) {
            log.writeToLog(1, "Error reading %s", filename);
            return false;
        }
        while (!conffiles.eof()) {
            getline(conffiles, linebuffer);
            if (!conffiles.eof() && linebuffer.length() != 0) {
                if (linebuffer[0] != '#') { // i.e. not commented out
                    temp = (char *) linebuffer.c_str();
                    if (temp.contains("#")) {
                        temp = temp.before("#");
                    }
                    temp.removeWhiteSpace(); // get rid of spaces at end of line
                    linebuffer = temp.toCharArray();
                    conffile.push_back(linebuffer); // stick option in deque
                }
            }
        }
        conffiles.close();


#ifdef MIND_DEBUG
        std::cout << "Read conf into memory: " << filename << std::endl;
#endif

        if (findoptionS("deepurlanalysis") == "on") {
            deep_url_analysis = true;
        } else {
            deep_url_analysis = false;
        }

        if (findoptionS("disablecontentscan") == "on") {
            disable_content_scan = true;
        } else {
            disable_content_scan = false;
        }

        // override default reporting level
        String temp_reporting_level(findoptionS("reportinglevel"));
        if (temp_reporting_level != "") {
            reporting_level = temp_reporting_level.toInteger();
            if ((reporting_level < -1) || (reporting_level > 3)) {
                log.writeToLog(1, "Invalid reportinglevel: %d", reporting_level);
                return false;
            }
        }

        // override default access denied address
        if (reporting_level == 1 || reporting_level == 2) {
            String temp_ada, temp_add;
            temp_ada = findoptionS("accessdeniedaddress");
            if (temp_ada != "") {
                access_denied_address = temp_ada.toCharArray();
                access_denied_domain = access_denied_address.c_str();
                access_denied_domain = access_denied_domain.after("://");
                access_denied_domain.removeWhiteSpace();
                if (access_denied_domain.contains("/")) {
                    access_denied_domain = access_denied_domain.before("/");
                    // access_denied_domain now contains the FQ host name of the
                    // server that serves the accessdenied.html file
                }
                if (access_denied_domain.contains(":")) {
                    access_denied_domain = access_denied_domain.before(":"); // chop off the port number if any
                }
            }
        }            // override default banned page
        else if (reporting_level == 3) {
            String html_template(findoptionS("htmltemplate"));
            if (html_template != "") {
                html_template = o.languagepath + html_template;
                banned_page = new HTMLTemplate;
                if (!(banned_page->readTemplateFile(html_template.toCharArray()))) {
                    log.writeToLog(1, "Error reading HTML Template file: %s", html_template.toCharArray());
                    return false;
                }
            }
        }

        // group mode: 0 = banned, 1 = filtered, 2 = exception
        group_mode = findoptionI("groupmode");
        if ((group_mode < 0) || (group_mode > 2)) {
            log.writeToLog(1, "Invalid groupmode");
            return false;
        }
#ifdef MIND_DEBUG
        std::cout << "Group mode: " << group_mode << std::endl;
#endif

        // grab group name (if not using external group names file)
        if (!o.use_group_names_list) {
            name = findoptionS("groupname");
#ifdef MIND_DEBUG
            std::cout << "Group name: " << name << std::endl;
#endif
        }

        if (group_mode == 1) {

            embedded_url_weight = findoptionI("embeddedurlweight");
#ifdef MIND_DEBUG
            std::cout << "Embedded URL Weight: " << embedded_url_weight << std::endl;
#endif

            category_threshold = findoptionI("categorydisplaythreshold");
#ifdef MIND_DEBUG
            std::cout << "Category display threshold: " << category_threshold << std::endl;
#endif

            // the mind.conf and pics files get amalgamated into one
            // deque.  They are only seperate files for clarity.

            if (findoptionS("enablepics") == "on") {
                enable_PICS = true;
            } else {
                enable_PICS = false;
            }

            if (findoptionS("blockdownloads") == "on") {
                block_downloads = true;
            }

            if (enable_PICS) {
                linebuffer = findoptionS("picsfile");
                std::ifstream picsfiles(linebuffer.c_str(), std::ios::in); // pics file
                if (!picsfiles.good()) {
                    log.writeToLog(1, "Error reading PICS file: %s", linebuffer.c_str());
                    return false;
                }
                while (!picsfiles.eof()) {
                    getline(picsfiles, linebuffer);
                    if (!picsfiles.eof() && linebuffer.length() != 0) {
                        if (linebuffer[0] != '#') { // i.e. not commented out
                            temp = (char *) linebuffer.c_str();
                            if (temp.contains("#")) {
                                temp = temp.before("#");
                            }
                            while (temp.endsWith(" ")) {
                                temp.chop(); // get rid of spaces at end of line
                            }
                            linebuffer = temp.toCharArray();
                            conffile.push_back(linebuffer); // stick option in deque
                        }
                    }
                }
                picsfiles.close();

#ifdef MIND_DEBUG
                std::cout << "Read PICS into memory" << std::endl;
            } else {
                std::cout << "PICS disabled" << std::endl;
#endif
            }

            naughtyness_limit = findoptionI("naughtynesslimit");
            if (!realitycheck(naughtyness_limit, 1, 0, "naughtynesslimit")) {
                return false;
            }
            std::string exception_phrase_list_location(findoptionS("exceptionphraselist"));
            std::string weighted_phrase_list_location(findoptionS("weightedphraselist"));
            std::string banned_phrase_list_location(findoptionS("bannedphraselist"));
            std::string banned_extension_list_location(findoptionS("bannedextensionlist"));
            std::string banned_mimetype_list_location(findoptionS("bannedmimetypelist"));
            std::string banned_site_list_location(findoptionS("bannedsitelist"));
            std::string banned_url_list_location(findoptionS("bannedurllist"));
            std::string grey_site_list_location(findoptionS("greysitelist"));
            std::string grey_url_list_location(findoptionS("greyurllist"));
            std::string banned_regexpurl_list_location(findoptionS("bannedregexpurllist"));
            std::string exception_regexpurl_list_location(findoptionS("exceptionregexpurllist"));
            std::string banned_regexpheader_list_location(findoptionS("bannedregexpheaderlist"));
            std::string content_regexp_list_location(findoptionS("contentregexplist"));
            std::string url_regexp_list_location(findoptionS("urlregexplist"));
            std::string header_regexp_list_location(findoptionS("headerregexplist"));
            std::string exceptions_site_list_location(findoptionS("exceptionsitelist"));
            std::string exceptions_url_list_location(findoptionS("exceptionurllist"));
            std::string exception_extension_list_location(findoptionS("exceptionextensionlist"));
            std::string exception_mimetype_list_location(findoptionS("exceptionmimetypelist"));
            std::string exception_file_site_list_location(findoptionS("exceptionfilesitelist"));
            std::string exception_file_url_list_location(findoptionS("exceptionfileurllist"));
            std::string log_url_list_location(findoptionS("logurllist"));
            std::string log_site_list_location(findoptionS("logsitelist"));
            std::string log_regexpurl_list_location(findoptionS("logregexpurllist"));

            forceGoogleSafeSearch = findoptionS("forcegooglesafesearch");

            if (forceGoogleSafeSearch == "")
                forceGoogleSafeSearch = "off";
            
            if (enable_PICS) {
                pics_rsac_nudity = findoptionI("RSACnudity");
                pics_rsac_language = findoptionI("RSAClanguage");
                pics_rsac_sex = findoptionI("RSACsex");
                pics_rsac_violence = findoptionI("RSACviolence");
                pics_evaluweb_rating = findoptionI("evaluWEBrating");
                pics_cybernot_sex = findoptionI("CyberNOTsex");
                pics_cybernot_other = findoptionI("CyberNOTother");
                pics_safesurf_agerange = findoptionI("SafeSurfagerange");
                pics_safesurf_profanity = findoptionI("SafeSurfprofanity");
                pics_safesurf_heterosexualthemes = findoptionI("SafeSurfheterosexualthemes");
                pics_safesurf_homosexualthemes = findoptionI("SafeSurfhomosexualthemes");
                pics_safesurf_nudity = findoptionI("SafeSurfnudity");
                pics_safesurf_violence = findoptionI("SafeSurfviolence");
                pics_safesurf_sexviolenceandprofanity = findoptionI("SafeSurfsexviolenceandprofanity");
                pics_safesurf_intolerance = findoptionI("SafeSurfintolerance");
                pics_safesurf_druguse = findoptionI("SafeSurfdruguse");
                pics_safesurf_otheradultthemes = findoptionI("SafeSurfotheradultthemes");
                pics_safesurf_gambling = findoptionI("SafeSurfgambling");
                pics_icra_chat = findoptionI("ICRAchat");
                pics_icra_moderatedchat = findoptionI("ICRAmoderatedchat");
                pics_icra_languagesexual = findoptionI("ICRAlanguagesexual");
                pics_icra_languageprofanity = findoptionI("ICRAlanguageprofanity");
                pics_icra_languagemildexpletives = findoptionI("ICRAlanguagemildexpletives");
                pics_icra_nuditygraphic = findoptionI("ICRAnuditygraphic");
                pics_icra_nuditymalegraphic = findoptionI("ICRAnuditymalegraphic");
                pics_icra_nudityfemalegraphic = findoptionI("ICRAnudityfemalegraphic");
                pics_icra_nuditytopless = findoptionI("ICRAnuditytopless");
                pics_icra_nuditybottoms = findoptionI("ICRAnuditybottoms");
                pics_icra_nuditysexualacts = findoptionI("ICRAnuditysexualacts");
                pics_icra_nudityobscuredsexualacts = findoptionI("ICRAnudityobscuredsexualacts");
                pics_icra_nuditysexualtouching = findoptionI("ICRAnuditysexualtouching");
                pics_icra_nuditykissing = findoptionI("ICRAnuditykissing");
                pics_icra_nudityartistic = findoptionI("ICRAnudityartistic");
                pics_icra_nudityeducational = findoptionI("ICRAnudityeducational");
                pics_icra_nuditymedical = findoptionI("ICRAnuditymedical");
                pics_icra_drugstobacco = findoptionI("ICRAdrugstobacco");
                pics_icra_drugsalcohol = findoptionI("ICRAdrugsalcohol");
                pics_icra_drugsuse = findoptionI("ICRAdrugsuse");
                pics_icra_gambling = findoptionI("ICRAgambling");
                pics_icra_weaponuse = findoptionI("ICRAweaponuse");
                pics_icra_intolerance = findoptionI("ICRAintolerance");
                pics_icra_badexample = findoptionI("ICRAbadexample");
                pics_icra_pgmaterial = findoptionI("ICRApgmaterial");
                pics_icra_violenceobjects = findoptionI("ICRAviolenceobjects");
                pics_icra_violencerape = findoptionI("ICRAviolencerape");
                pics_icra_violencetohumans = findoptionI("ICRAviolencetohumans");
                pics_icra_violencetoanimals = findoptionI("ICRAviolencetoanimals");
                pics_icra_violencetofantasy = findoptionI("ICRAviolencetofantasy");
                pics_icra_violencekillinghumans = findoptionI("ICRAviolencekillinghumans");
                pics_icra_violencekillinganimals = findoptionI("ICRAviolencekillinganimals");
                pics_icra_violencekillingfantasy = findoptionI("ICRAviolencekillingfantasy");
                pics_icra_violenceinjuryhumans = findoptionI("ICRAviolenceinjuryhumans");
                pics_icra_violenceinjuryanimals = findoptionI("ICRAviolenceinjuryanimals");
                pics_icra_violenceinjuryfantasy = findoptionI("ICRAviolenceinjuryfantasy");
                pics_icra_violenceartisitic = findoptionI("ICRAviolenceartisitic");
                pics_icra_violenceeducational = findoptionI("ICRAviolenceeducational");
                pics_icra_violencemedical = findoptionI("ICRAviolencemedical");
                pics_icra_violencesports = findoptionI("ICRAviolencesports");
                pics_weburbia_rating = findoptionI("Weburbiarating");
                pics_vancouver_multiculturalism = findoptionI("Vancouvermulticulturalism");
                pics_vancouver_educationalcontent = findoptionI("Vancouvereducationalcontent");
                pics_vancouver_environmentalawareness = findoptionI("Vancouverenvironmentalawareness");
                pics_vancouver_tolerance = findoptionI("Vancouvertolerance");
                pics_vancouver_violence = findoptionI("Vancouverviolence");
                pics_vancouver_sex = findoptionI("Vancouversex");
                pics_vancouver_profanity = findoptionI("Vancouverprofanity");
                pics_vancouver_safety = findoptionI("Vancouversafety");
                pics_vancouver_canadiancontent = findoptionI("Vancouvercanadiancontent");
                pics_vancouver_commercialcontent = findoptionI("Vancouvercommercialcontent");
                pics_vancouver_gambling = findoptionI("Vancouvergambling");

                // new Korean PICS support
                pics_icec_rating = findoptionI("ICECrating");
                pics_safenet_nudity = findoptionI("SafeNetnudity");
                pics_safenet_language = findoptionI("SafeNetlanguage");
                pics_safenet_sex = findoptionI("SafeNetsex");
                pics_safenet_violence = findoptionI("SafeNetviolence");
                pics_safenet_gambling = findoptionI("SafeNetgambling");
                pics_safenet_alcoholtobacco = findoptionI("SafeNetalcoholtobacco");
            }
#ifdef MIND_DEBUG
            else
                std::cout << "PICS disabled; options skipped" << std::endl;
#endif

#ifdef MIND_DEBUG
            std::cout << "Read settings into memory" << std::endl;
            std::cout << "Reading phrase, URL and site lists into memory" << std::endl;
#endif

            if (!block_downloads) {
#ifdef MIND_DEBUG
                std::cout << "Blanket download block disabled; using standard banned file lists" << std::endl;
#endif
                if (!readFile(banned_extension_list_location.c_str(), &banned_extension_list, false, false, "bannedextensionlist")) {
                    return false;
                } // file extensions
                banned_extension_flag = true;
                if (!readFile(banned_mimetype_list_location.c_str(), &banned_mimetype_list, false, true, "bannedmimetypelist")) {
                    return false;
                } // mime types
                banned_mimetype_flag = true;
            }
            if (!readFile(exception_extension_list_location.c_str(), &exception_extension_list, false, false, "exceptionextensionlist")) {
                return false;
            } // file extensions
            exception_extension_flag = true;
            if (!readFile(exception_mimetype_list_location.c_str(), &exception_mimetype_list, false, true, "exceptionmimetypelist")) {
                return false;
            } // mime types
            exception_mimetype_flag = true;
            if (!readFile(exception_file_site_list_location.c_str(), &exception_file_site_list, false, true, "exceptionfilesitelist")) {
                return false;
            } // download site exceptions
            exception_file_site_flag = true;
            if (!readFile(exception_file_url_list_location.c_str(), &exception_file_url_list, true, true, "exceptionfileurllist")) {
                return false;
            } // download site exceptions
            exception_file_url_flag = true;

            if (!readbplfile(banned_phrase_list_location.c_str(), exception_phrase_list_location.c_str(), weighted_phrase_list_location.c_str())) {
                return false;
            } // read banned, exception, weighted phrase list
            banned_phrase_flag = true;
            if (!readFile(exceptions_site_list_location.c_str(), &exception_site_list, false, true, "exceptionsitelist")) {
                return false;
            } // site exceptions
            exception_site_flag = true;
            if (!readFile(exceptions_url_list_location.c_str(), &exception_url_list, true, true, "exceptionurllist")) {
                return false;
            } // url exceptions
            exception_url_flag = true;
            if (!readFile(banned_site_list_location.c_str(), &banned_site_list, false, true, "bannedsitelist")) {
                return false;
            } // banned domains
            banned_site_flag = true;
            if (!readFile(banned_url_list_location.c_str(), &banned_url_list, true, true, "bannedurllist")) {
                return false;
            } // banned urls
            banned_url_flag = true;
            if (!readFile(grey_site_list_location.c_str(), &grey_site_list, false, true, "greysitelist")) {
                return false;
            } // grey domains
            grey_site_flag = true;
            if (!readFile(grey_url_list_location.c_str(), &grey_url_list, true, true, "greyurllist")) {
                return false;
            } // grey urls
            grey_url_flag = true;

            // log-only lists
            if (log_url_list_location.length() && readFile(log_url_list_location.c_str(), &log_url_list, true, true, "logurllist")) {
                log_url_flag = true;
#ifdef MIND_DEBUG
                std::cout << "Enabled log-only URL list" << std::endl;
#endif
            }
            if (log_site_list_location.length() && readFile(log_site_list_location.c_str(), &log_site_list, false, true, "logsitelist")) {
                log_site_flag = true;
#ifdef MIND_DEBUG
                std::cout << "Enabled log-only domain list" << std::endl;
#endif
            }
            if (log_regexpurl_list_location.length() && readRegExMatchFile(log_regexpurl_list_location.c_str(), "logregexpurllist", log_regexpurl_list,
                    log_regexpurl_list_comp, log_regexpurl_list_source, log_regexpurl_list_ref)) {
                log_regexpurl_flag = true;
#ifdef MIND_DEBUG
                std::cout << "Enabled log-only RegExp URL list" << std::endl;
#endif
            }

            if (!readRegExMatchFile(banned_regexpurl_list_location.c_str(), "bannedregexpurllist", banned_regexpurl_list,
                    banned_regexpurl_list_comp, banned_regexpurl_list_source, banned_regexpurl_list_ref)) {
                return false;
            } // banned reg exp urls
            banned_regexpurl_flag = true;

            if (!readRegExMatchFile(exception_regexpurl_list_location.c_str(), "exceptionregexpurllist", exception_regexpurl_list,
                    exception_regexpurl_list_comp, exception_regexpurl_list_source, exception_regexpurl_list_ref)) {
                return false;
            } // exception reg exp urls
            exception_regexpurl_flag = true;

            if (!readRegExMatchFile(banned_regexpheader_list_location.c_str(), "bannedregexpheaderlist", banned_regexpheader_list,
                    banned_regexpheader_list_comp, banned_regexpheader_list_source, banned_regexpheader_list_ref)) {
                return false;
            } // banned reg exp headers
            banned_regexpheader_flag = true;

            if (!readRegExReplacementFile(content_regexp_list_location.c_str(), "contentregexplist", content_regexp_list, content_regexp_list_rep, content_regexp_list_comp)) {
                return false;
            } // content replacement regular expressions
            content_regexp_flag = true;

            if (!readRegExReplacementFile(url_regexp_list_location.c_str(), "urlregexplist", url_regexp_list, url_regexp_list_rep, url_regexp_list_comp)) {
                return false;
            } // url replacement regular expressions
            url_regexp_flag = true;

            if (!readRegExReplacementFile(header_regexp_list_location.c_str(), "headerregexplist", header_regexp_list, header_regexp_list_rep, header_regexp_list_comp)) {
                return false;
            } // header replacement regular expressions
            header_regexp_flag = true;
#ifdef MIND_DEBUG
            std::cout << "Lists in memory" << std::endl;
#endif
        }

        if (!precompileregexps()) {
            return false;
        } // precompiled reg exps for speed

        //
        //
        // Bypass/infection bypass modes
        //
        //

        bypass_mode = findoptionI("bypass");
        if (!realitycheck(bypass_mode, -1, 0, "bypass")) {
            return false;
        }
        // we use the "magic" key here both for filter bypass *and* for filter bypass after virus scan (fancy DM).
        if ((bypass_mode != 0) || (disable_content_scan != 1)) {
            magic = findoptionS("bypasskey");
            if (magic.length() < 9) {
                std::string s(16u, ' ');
                for (int i = 0; i < 16; i++) {
                    s[i] = (rand() % 26) + 'A';
                }
                magic = s;
            }
#ifdef MIND_DEBUG
            std::cout << "Setting magic key to '" << magic << "'" << std::endl;
#endif
            // Create the Bypass Cookie magic key
            cookie_magic = std::string(16u, ' ');
            for (int i = 0; i < 16; i++) {
                cookie_magic[i] = (rand() % 26) + 'A';
            }
        }

        infection_bypass_mode = findoptionI("infectionbypass");
        if (!realitycheck(infection_bypass_mode, -1, 0, "infectionbypass")) {
            return false;
        }
        if (infection_bypass_mode != 0) {
            imagic = findoptionS("infectionbypasskey");
            if (imagic.length() < 9) {
                std::string s(16u, ' ');
                for (int i = 0; i < 16; i++) {
                    s[i] = (rand() % 26) + 'A';
                }
                imagic = s;
            }
#ifdef MIND_DEBUG
            std::cout << "Setting imagic key to '" << imagic << "'" << std::endl;
#endif
            if (findoptionS("infectionbypasserrorsonly") == "off") {
                infection_bypass_errors_only = false;
            } else {
#ifdef MIND_DEBUG
                std::cout << "Only allowing infection bypass on scan error" << std::endl;
#endif
                infection_bypass_errors_only = true;
            }
        }
    } catch (std::exception & e) {
        if (!is_daemonised) {
            std::cerr << e.what() << std::endl; // when called the daemon has not
            // detached so we can do this
        }
        return false;
    }
    return true;
}

bool FOptionContainer::readbplfile(const char *banned, const char *exception, const char *weighted) {

    int res = o.lm.newPhraseList(exception, banned, weighted);
    if (res < 0) {
        log.writeToLog(1, "%s", "Error opening phraselists");
        return false;
    }
    banned_phrase_list = res;
    if (!(*o.lm.l[banned_phrase_list]).used) {
#ifdef MIND_DEBUG
        std::cout << "Reading new phrase lists" << std::endl;
#endif
        bool result = (*o.lm.l[banned_phrase_list]).readPhraseList(exception, true);
        if (!result) {
            log.writeToLog(1, "%s", "Error opening exceptionphraselist");
            return false;
        }

        result = (*o.lm.l[banned_phrase_list]).readPhraseList(banned, false, -1, -1, false);
        if (!result) {
            log.writeToLog(1, "%s", "Error opening bannedphraselist");
            return false;
        }
        if (weighted_phrase_mode > 0) { // if zero wpl is deactivated
#ifdef MIND_DEBUG
            std::cout << "Reading weighted phrase list" << std::endl;
#endif
            result = (*o.lm.l[banned_phrase_list]).readPhraseList(weighted, false, -1, -1, false);
            if (!result) {
                log.writeToLog(1, "%s", "Error opening weightedphraselist");
                return false;
            }
        }
        if (!(*o.lm.l[banned_phrase_list]).makeGraph(force_quick_search))
            return false;

        (*o.lm.l[banned_phrase_list]).used = true;
    }
    return true;
}

// read regexp url list

bool FOptionContainer::readRegExMatchFile(const char *filename, const char *listname, unsigned int& listref,
        std::deque<RegExp> &list_comp, std::deque<String> &list_source, std::deque<unsigned int> &list_ref) {
    int result = o.lm.newItemList(filename, true, 32, true);
    if (result < 0) {
        log.writeToLog(1, "Error opening %s", listname);
        return false;
    }
    listref = (unsigned) result;
    return compileRegExMatchFile(listref, list_comp, list_source, list_ref);
}

bool FOptionContainer::compileRegExMatchFile(unsigned int list, std::deque<RegExp> &list_comp,
        std::deque<String> &list_source, std::deque<unsigned int> &list_ref) {
    for (unsigned int i = 0; i < (*o.lm.l[list]).morelists.size(); i++) {
        if (!compileRegExMatchFile((*o.lm.l[list]).morelists[i], list_comp, list_source, list_ref)) {
            return false;
        }
    }
    RegExp r;
    bool rv = true;
    int len = (*o.lm.l[list]).getListLength();
    String source;
    for (int i = 0; i < len; i++) {
        source = (*o.lm.l[list]).getItemAtInt(i).c_str();
        rv = r.comp(source.toCharArray());
        if (rv == false) {
            log.writeToLog(1, "%s", "Error compiling regexp:");
            log.writeToLog(1, "%s", source.toCharArray());
            return false;
        }
        list_comp.push_back(r);
        list_source.push_back(source);
        list_ref.push_back(list);
    }
    (*o.lm.l[list]).used = true;
    return true;
}

// content and URL regular expression replacement files

bool FOptionContainer::readRegExReplacementFile(const char *filename, const char *listname, unsigned int& listid,
        std::deque<String> &list_rep, std::deque<RegExp> &list_comp) {
    int result = o.lm.newItemList(filename, true, 32, true);
    if (result < 0) {
        log.writeToLog(1, "Error opening %s", listname);
        return false;
    }
    listid = (unsigned) result;
    if (!(*o.lm.l[listid]).used) {
        //(*o.lm.l[listid]).doSort(true);
        (*o.lm.l[listid]).used = true;
    }
    RegExp r;
    bool rv = true;
    String regexp;
    String replacement;
    for (int i = 0; i < (*o.lm.l[listid]).getListLength(); i++) {
        regexp = (*o.lm.l[listid]).getItemAtInt(i).c_str();
        replacement = regexp.after("\"->\"");
        while (!replacement.endsWith("\"")) {
            if (replacement.length() < 2) {
                break;
            }
            replacement.chop();
        }
        replacement.chop();
        regexp = regexp.after("\"").before("\"->\"");
        //        if (replacement.length() < 1 || regexp.length() < 1) {
        if (regexp.length() < 1) { // allow replace with nothing
            continue;
        }
        rv = r.comp(regexp.toCharArray());
        if (rv == false) {
            log.writeToLog(1, "%s", "Error compiling regexp: ");
            log.writeToLog(1, "%s", (*o.lm.l[listid]).getItemAtInt(i).c_str());
            return false;
        }
        list_comp.push_back(r);
        list_rep.push_back(replacement);
    }
    return true;
}

// Recursively check site & URL lists for blanket matches

char *FOptionContainer::testBlanketBlock(unsigned int list, bool ip, bool ssl) {
    if (not o.lm.l[list]->isNow())
        return NULL;
    if (o.lm.l[list]->blanketblock) {
        return (char*) o.language_list.getTranslation(502);
    } else if (o.lm.l[list]->blanket_ip_block and ip) {
        return (char*) o.language_list.getTranslation(505);
    } else if (o.lm.l[list]->blanketsslblock and ssl) {
        return (char*) o.language_list.getTranslation(506);
    } else if (o.lm.l[list]->blanketssl_ip_block and ssl and ip) {
        return (char*) o.language_list.getTranslation(507);
    }
    for (std::vector<int>::iterator i = o.lm.l[list]->morelists.begin(); i != o.lm.l[list]->morelists.end(); i++) {
        char *r = testBlanketBlock(*i, ip, ssl);
        if (r) {
            return r;
        }
    }
    return NULL;
}

// checkme: there's an awful lot of removing whitespace, PTP, etc. going on here.
// perhaps connectionhandler could keep a suitably modified version handy to prevent repitition of work?

char *FOptionContainer::inSiteList(String &url, unsigned int list, bool doblanket, bool ip, bool ssl) {
    // Perform blanket matching if desired
    if (doblanket) {
        char *r = testBlanketBlock(list, ip, ssl);
        if (r) {
            return r;
        }
    }

    url.removeWhiteSpace(); // just in case of weird browser crap
    url.toLower();
    url.removePTP(); // chop off the ht(f)tp(s)://
    if (url.contains("/")) {
        url = url.before("/"); // chop off any path after the domain
    }
    char *i;
    bool isipurl = isIPHostname(url);
    if (reverse_lookups && isipurl) { // change that ip into hostname
        std::deque<String > *url2s = ipToHostname(url.toCharArray());
        String url2;
        for (std::deque<String>::iterator j = url2s->begin(); j != url2s->end(); j++) {
            url2 = *j;
            while (url2.contains(".")) {
                i = (*o.lm.l[list]).findInList(url2.toCharArray());
                if (i != NULL) {
                    return i; // exact match
                }
                url2 = url2.after("."); // check for being in hld
            }
        }
        delete url2s;
    }
    while (url.contains(".")) {
        i = (*o.lm.l[list]).findInList(url.toCharArray());
        if (i != NULL) {
            return i; // exact match
        }
        url = url.after("."); // check for being in higher level domains
    }
    if (url.length() > 1) { // allows matching of .tld
        url = "." + url;
        i = (*o.lm.l[list]).findInList(url.toCharArray());
        if (i != NULL) {
            return i; // exact match
        }
    }
    return NULL; // and our survey said "UUHH UURRGHH"
}

// checkme: remove things like this & make inSiteList/inIPList public?

char *FOptionContainer::inBannedSiteList(String url, bool doblanket, bool ip, bool ssl) {
    return inSiteList(url, banned_site_list, doblanket, ip, ssl);
}

bool FOptionContainer::inGreySiteList(String url, bool doblanket, bool ip, bool ssl) {
    return inSiteList(url, grey_site_list, doblanket, ip, ssl) != NULL;
}

bool FOptionContainer::inExceptionSiteList(String url, bool doblanket, bool ip, bool ssl) {
    return inSiteList(url, exception_site_list, doblanket, ip, ssl) != NULL;
}

bool FOptionContainer::inExceptionFileSiteList(String url) {
    if (inSiteList(url, exception_file_site_list) != NULL)
        return true;
    else
        return inURLList(url, exception_file_url_list) != NULL;
}

// look in given URL list for given URL

char *FOptionContainer::inURLList(String &url, unsigned int list, bool doblanket, bool ip, bool ssl) {
    // Perform blanket matching if desired
    if (doblanket) {
        char *r = testBlanketBlock(list, ip, ssl);
        if (r) {
            return r;
        }
    }

    unsigned int fl;
    char *i;
    String foundurl;
#ifdef MIND_DEBUG
    std::cout << "inURLList: " << url << std::endl;
#endif
    url.removeWhiteSpace(); // just in case of weird browser crap
    url.toLower();
    url.removePTP(); // chop off the ht(f)tp(s)://
    if (url.contains("/")) {
        String tpath("/");
        tpath += url.after("/");
        url = url.before("/");
        tpath.hexDecode();
        tpath.realPath();
        url += tpath; // will resolve ../ and %2e2e/ and // etc
    }
    if (url.endsWith("/")) {
        url.chop(); // chop off trailing / if any
    }
#ifdef MIND_DEBUG
    std::cout << "inURLList (processed): " << url << std::endl;
#endif
    if (reverse_lookups && url.after("/").length() > 0) {
        String hostname(url.before("/"));
        if (isIPHostname(hostname)) {
            std::deque<String > *url2s = ipToHostname(hostname.toCharArray());
            String url2;
            for (std::deque<String>::iterator j = url2s->begin(); j != url2s->end(); j++) {
                url2 = *j;
                url2 += "/";
                url2 += url.after("/");
                while (url2.before("/").contains(".")) {
                    i = (*o.lm.l[list]).findStartsWith(url2.toCharArray());
                    if (i != NULL) {
                        foundurl = i;
                        fl = foundurl.length();
                        if (url2.length() > fl) {
                            unsigned char c = url[fl];
                            if (c == '/' || c == '?' || c == '&' || c == '=') {
                                return i; // matches /blah/ or /blah/foo
                                // (or /blah?foo etc.)
                                // but not /blahfoo
                            }
                        } else {
                            return i; // exact match
                        }
                    }
                    url2 = url2.after("."); // check for being in hld
                }
            }
            delete url2s;
        }
    }
    while (url.before("/").contains(".")) {
        i = (*o.lm.l[list]).findStartsWith(url.toCharArray());
        if (i != NULL) {
            foundurl = i;
            fl = foundurl.length();
#ifdef MIND_DEBUG
            std::cout << "foundurl: " << foundurl << foundurl.length() << std::endl;
            std::cout << "url: " << url << fl << std::endl;
#endif
            if (url.length() > fl) {
                if (url[fl] == '/' || url[fl] == '?' || url[fl] == '&' || url[fl] == '=') {
                    return i; // matches /blah/ or /blah/foo but not /blahfoo
                }
            } else {
                return i; // exact match
            }
        }
        url = url.after("."); // check for being in higher level domains
    }
    return NULL;
}

char *FOptionContainer::inBannedURLList(String url, bool doblanket, bool ip, bool ssl) {
#ifdef MIND_DEBUG
    std::cout << "inBannedURLList" << std::endl;
#endif
    return inURLList(url, banned_url_list, doblanket, ip, ssl);
}

bool FOptionContainer::inGreyURLList(String url, bool doblanket, bool ip, bool ssl) {
#ifdef MIND_DEBUG
    std::cout << "inGreyURLList" << std::endl;
#endif
    return inURLList(url, grey_url_list, doblanket, ip, ssl) != NULL;
}

bool FOptionContainer::inExceptionURLList(String url, bool doblanket, bool ip, bool ssl) {
#ifdef MIND_DEBUG
    std::cout << "inExceptionURLList" << std::endl;
#endif
    return inURLList(url, exception_url_list, doblanket, ip, ssl) != NULL;
}

// New log-only site lists

const char* FOptionContainer::inLogURLList(String url) {
    if (!log_url_flag)
        return NULL;
    if (inURLList(url, log_url_list) != NULL) {
        return o.lm.l[log_url_list]->lastcategory.toCharArray();
    }
    return NULL;
}

const char* FOptionContainer::inLogSiteList(String url) {
    if (!log_site_flag)
        return NULL;
    if (inSiteList(url, log_site_list) != NULL) {
        return o.lm.l[log_site_list]->lastcategory.toCharArray();
    }
    return NULL;
}

const char* FOptionContainer::inLogRegExpURLList(String url) {
    if (!log_regexpurl_flag)
        return NULL;
    int j = inRegExpURLList(url, log_regexpurl_list_comp, log_regexpurl_list_ref, log_regexpurl_list);
    if (j == -1)
        return NULL;
    return o.lm.l[log_regexpurl_list_ref[j]]->category.toCharArray();
}

// TODO: Store the modified URL somewhere, instead of re-processing it every time.

char *FOptionContainer::inExtensionList(unsigned int list, String url) {
    url.removeWhiteSpace(); // just in case of weird browser crap
    url.toLower();
    url.hexDecode();
    url.removePTP(); // chop off the ht(f)tp(s)://
    url = url.after("/"); // chop off any domain before the path
    if (url.length() < 2) { // will never match
        return NULL;
    }
    return (*o.lm.l[list]).findEndsWith(url.toCharArray());
}

// is this line of the headers in the banned regexp header list?

int FOptionContainer::inBannedRegExpHeaderList(std::deque<String> &header) {

    for (std::deque<String>::iterator k = header.begin(); k != header.end(); k++) {
#ifdef MIND_DEBUG
        std::cout << "inBannedRegExpHeaderList: " << *k << std::endl;
#endif
        unsigned int i = 0;
        for (std::deque<RegExp>::iterator j = banned_regexpheader_list_comp.begin(); j != banned_regexpheader_list_comp.end(); j++) {
            if (o.lm.l[banned_regexpheader_list_ref[i]]->isNow()) {
                j->match(k->toCharArray());
                if (j->matched())
                    return i;
            }
#ifdef MIND_DEBUG
            else
                std::cout << "Outside included regexp list's time limit" << std::endl;
#endif
            i++;
        }
    }
    return -1;
}

// is this URL in the given regexp URL list?

int FOptionContainer::inRegExpURLList(String &url, std::deque<RegExp> &list_comp, std::deque<unsigned int> &list_ref, unsigned int list) {
#ifdef MIND_DEBUG
    std::cout << "inRegExpURLList: " << url << std::endl;
#endif
    // check parent list's time limit
    if (o.lm.l[list]->isNow()) {
        url.removeWhiteSpace(); // just in case of weird browser crap
        url.toLower();
        // chop off the PTP (ht(f)tp(s)://)
        /*String ptp;
        if (url.contains("//")) {
                ptp = url.before("//");
                url = url.after("//");
        }*/

        // whilst it would be nice to have regexes be able to match the PTP,
        // it has been assumed for too long that the URL string does not start with one,
        // and we don't want to break regexes that look explicitly for the start of
        // the string. changes here have therefore been reverted. 2005-12-07
        url.removePTP();
        if (url.contains("/")) {
            String tpath("/");
            tpath += url.after("/");
            url = url.before("/");
            tpath.hexDecode();
            tpath.realPath();
            url += tpath; // will resolve ../ and %2e2e/ and // etc
        }
        if (url.endsWith("/")) {
            url.chop(); // chop off trailing / if any
        }
        // re-add the PTP
        /*if (ptp.length() > 0)
                url = ptp + "//" + url;*/
#ifdef MIND_DEBUG
        std::cout << "inRegExpURLList (processed): " << url << std::endl;
#endif
        unsigned int i = 0;
        for (std::deque<RegExp>::iterator j = list_comp.begin(); j != list_comp.end(); j++) {
            if (o.lm.l[list_ref[i]]->isNow()) {
                j->match(url.toCharArray());
                if (j->matched())
                    return i;
            }
#ifdef MIND_DEBUG
            else
                std::cout << "Outside included regexp list's time limit" << std::endl;
#endif
            i++;
        }
    }
#ifdef MIND_DEBUG
    else {
        std::cout << "Outside top level regexp list's time limit" << std::endl;
    }
#endif
    return -1;
}

// use above to check banned/exception RegExp URLs

int FOptionContainer::inBannedRegExpURLList(String url) {
#ifdef MIND_DEBUG
    std::cout << "inBannedRegExpURLList" << std::endl;
#endif
    return inRegExpURLList(url, banned_regexpurl_list_comp, banned_regexpurl_list_ref, banned_regexpurl_list);
}

int FOptionContainer::inExceptionRegExpURLList(String url) {
#ifdef MIND_DEBUG
    std::cout << "inExceptionRegExpURLList" << std::endl;
#endif
    return inRegExpURLList(url, exception_regexpurl_list_comp, exception_regexpurl_list_ref, exception_regexpurl_list);
}

bool FOptionContainer::isIPHostname(String url) {
    if (!isiphost.match(url.toCharArray())) {
        return true;
    }
    return false;
}

int FOptionContainer::findoptionI(const char *option) {
    int res = String(findoptionS(option).c_str()).toInteger();
    return res;
}

std::string FOptionContainer::findoptionS(const char *option) {
    // findoptionS returns a found option stored in the deque
    String temp;
    String temp2;
    String o(option);
    for (int i = 0; i < (signed) conffile.size(); i++) {
        temp = conffile[i].c_str();
        temp2 = temp.before("=");
        while (temp2.endsWith(" ")) { // get rid of tailing spaces before =
            temp2.chop();
        }
        if (o == temp2) {
            temp = temp.after("=");
            while (temp.startsWith(" ")) { // get rid of heading spaces
                temp.lop();
            }
            if (temp.startsWith("'")) { // inverted commas
                temp.lop();
            }
            while (temp.endsWith(" ")) { // get rid of tailing spaces
                temp.chop();
            }
            if (temp.endsWith("'")) { // inverted commas
                temp.chop();
            }
            return temp.toCharArray();
        }
    }
    return "";
}

bool FOptionContainer::realitycheck(int l, int minl, int maxl, const char *emessage) {
    // realitycheck checks a String for certain expected criteria
    // so we can spot problems in the conf files easier
    if ((l < minl) || ((maxl > 0) && (l > maxl))) {
        log.writeToLog(1, "Config problem; check allowed values for %s", emessage);

        return false;
    }
    return true;
}

bool FOptionContainer::precompileregexps() {
    if (!pics1.comp("pics-label\"[ \t]*content=[\'\"]([^>]*)[\'\"]")) {
        log.writeToLog(1, "%s", "Error compiling RegExp pics1.");
        return false;
    }
    if (!pics2.comp("[r|{ratings}] *\\(([^\\)]*)\\)")) {
        log.writeToLog(1, "%s", "Error compiling RegExp pics2.");
        return false;
    }
    if (!isiphost.comp(".*[a-z|A-Z].*")) {
        log.writeToLog(1, "%s", "Error compiling RegExp isiphost.");
        return false;
    }

    return true;
}

bool FOptionContainer::isOurWebserver(String url) {
    // reporting levels 0 and 3 don't use the CGI
    if (reporting_level == 1 || reporting_level == 2) {
        url.removeWhiteSpace(); // just in case of weird browser crap
        url.toLower();
        url.removePTP(); // chop off the ht(f)tp(s)://
        if (url.contains("/")) {
            url = url.before("/"); // chop off any path after the domain
        }
        if (url.startsWith(access_denied_domain)) { // don't filter our web server
            return true;
        }
    }
    return false;
}
